home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
BBS in a Box 7
/
BBS in a Box - Macintosh - Volume VII (BBS in a Box) (January 1993).iso
/
Files
/
Prog
/
Q-R
/
R⁄O strsml.cpt
/
Ratcliff_Obershelp
/
strsml1.c
< prev
next >
Wrap
Text File
|
1989-01-12
|
2KB
|
69 lines
/*
simil(s1, s2) - returns a value signifying how similar two
strings are (0 - completely different, 100 - exactly the same)
using the Ratcliff/Obershelp pattern matching algorithm.
(Code courtesy of Rick Orsborn published in DDJ, Nov 1988, #145, pg.12, 118).
*/
int simil(s1, s2)
char *s1, *s2;
{
short l1, l2;
l1 = strlen(s1);
l2 = strlen(s2);
if (l1 == 1) /* check for the easiest case */
if (l2 ==1)
if (*s1 == *s2)
return (100);
return (200 * GCsubstr(s1, s1 + l1, s2, s2 + l2) / (l1 + l2));
}
/*
GCsubstr(st1, end1, st2, end2) - returns the length of the greatest
common substring using recursion. st1 and st2 are the two strings.
end1 and end2 are pointers to the ends of the strings.
*/
int GCsubstr(st1, end1, st2, end2)
char *st1, *end1, *st2, *end2;
{
register char *a1, *b1, *s1, *a2, *b2, *s2;
short max, i;
if (end1 <= st1) /* st1 is empty */
return (0);
if (end2 <= st2) /* st2 is empty */
return (0);
if (end1 == st1 + 1) /* if s1 has one char */
if (end2 == st2 + 1) /* and s2 has one char, */
return (0); /* they cannot be equal */
max = 0;
b1 = end1;
b2 = end2;
for (a1 = st1; a1 < b1; a1++)
for (a2 = st2; a2 < b2; a2++)
if (*a1 == *a2)
{
for (i = 1; a1[i] && (a1[i] == a2[i]); i++)
/* how long is the common substring? */;
if (i > max)
{
max = i;
s1 = a1;
s2 = a2;
b1 = end1 - max;
b2 = end2 - max;
}
}
if (! max)
return (0);
max += GCsubstr(s1 + max, end1, s2 + max, end2); /* right substring */
max += GCsubstr(st1, s1, st2, s2); /* left substring */
return (max);
}